#!/usr/bin/env python

import urllib2
import pickle
import re

from urllib2 import urlopen

#july 06 - june 07

def main():
    year = '2007'
    months = range(1, 7)
    url = 'http://logopond.com/gallery/?month=%d&year=%s&page=%d'
    pages_exp = re.compile('page \d+ of  (\d+)')
    id_exp = re.compile('/gallery/detail/(\d+)')
    results_file = 'results.pickle'
    results = pickle.load(open(results_file, 'rb'))
    for d in months:
        num, cur = 0, 0
        while True:
            cur += 1
            cururl = url % (d, year, cur)
            print cururl
            text = urlopen(cururl).read()
            if num == 0:
                num = int(re.search(pages_exp, text).group(1))
            ids = re.findall(id_exp, text)
            print '%d ids found' % (len(ids),)
            results += ids
            if num == cur:
                break
    temp = set(results)
    results = []
    results += temp
    pickle.dump(results, open(results_file, 'wb'))
    print '%d ids saved' % (len(results),)


if __name__ == '__main__':
    main()

